

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
%matplotlib inline
df1 = pd.read_csv("C:\\Users\\HP\\Downloads\\archive (1)\\ipl\\IPL Ball-by-Ball 2008-2020.csv")
df2 = pd.read_csv("C:\\Users\\HP\\Downloads\\archive (1)\\IPL Matches 2008-2020.csv")
df2
| id | city | date | player_of_match | venue | neutral_venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | method | umpire1 | umpire2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 335982 | Bangalore | 2008-04-18 | BB McCullum | M Chinnaswamy Stadium | 0 | Royal Challengers Bangalore | Kolkata Knight Riders | Royal Challengers Bangalore | field | Kolkata Knight Riders | runs | 140.0 | N | NaN | Asad Rauf | RE Koertzen |
| 1 | 335983 | Chandigarh | 2008-04-19 | MEK Hussey | Punjab Cricket Association Stadium, Mohali | 0 | Kings XI Punjab | Chennai Super Kings | Chennai Super Kings | bat | Chennai Super Kings | runs | 33.0 | N | NaN | MR Benson | SL Shastri |
| 2 | 335984 | Delhi | 2008-04-19 | MF Maharoof | Feroz Shah Kotla | 0 | Delhi Daredevils | Rajasthan Royals | Rajasthan Royals | bat | Delhi Daredevils | wickets | 9.0 | N | NaN | Aleem Dar | GA Pratapkumar |
| 3 | 335985 | Mumbai | 2008-04-20 | MV Boucher | Wankhede Stadium | 0 | Mumbai Indians | Royal Challengers Bangalore | Mumbai Indians | bat | Royal Challengers Bangalore | wickets | 5.0 | N | NaN | SJ Davis | DJ Harper |
| 4 | 335986 | Kolkata | 2008-04-20 | DJ Hussey | Eden Gardens | 0 | Kolkata Knight Riders | Deccan Chargers | Deccan Chargers | bat | Kolkata Knight Riders | wickets | 5.0 | N | NaN | BF Bowden | K Hariharan |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 811 | 1216547 | Dubai | 2020-09-28 | AB de Villiers | Dubai International Cricket Stadium | 0 | Royal Challengers Bangalore | Mumbai Indians | Mumbai Indians | field | Royal Challengers Bangalore | tie | NaN | Y | NaN | Nitin Menon | PR Reiffel |
| 812 | 1237177 | Dubai | 2020-11-05 | JJ Bumrah | Dubai International Cricket Stadium | 0 | Mumbai Indians | Delhi Capitals | Delhi Capitals | field | Mumbai Indians | runs | 57.0 | N | NaN | CB Gaffaney | Nitin Menon |
| 813 | 1237178 | Abu Dhabi | 2020-11-06 | KS Williamson | Sheikh Zayed Stadium | 0 | Royal Challengers Bangalore | Sunrisers Hyderabad | Sunrisers Hyderabad | field | Sunrisers Hyderabad | wickets | 6.0 | N | NaN | PR Reiffel | S Ravi |
| 814 | 1237180 | Abu Dhabi | 2020-11-08 | MP Stoinis | Sheikh Zayed Stadium | 0 | Delhi Capitals | Sunrisers Hyderabad | Delhi Capitals | bat | Delhi Capitals | runs | 17.0 | N | NaN | PR Reiffel | S Ravi |
| 815 | 1237181 | Dubai | 2020-11-10 | TA Boult | Dubai International Cricket Stadium | 0 | Delhi Capitals | Mumbai Indians | Delhi Capitals | bat | Mumbai Indians | wickets | 5.0 | N | NaN | CB Gaffaney | Nitin Menon |
816 rows × 17 columns
df2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 816 entries, 0 to 815 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 816 non-null int64 1 city 803 non-null object 2 date 816 non-null object 3 player_of_match 812 non-null object 4 venue 816 non-null object 5 neutral_venue 816 non-null int64 6 team1 816 non-null object 7 team2 816 non-null object 8 toss_winner 816 non-null object 9 toss_decision 816 non-null object 10 winner 812 non-null object 11 result 812 non-null object 12 result_margin 799 non-null float64 13 eliminator 812 non-null object 14 method 19 non-null object 15 umpire1 816 non-null object 16 umpire2 816 non-null object dtypes: float64(1), int64(2), object(14) memory usage: 108.5+ KB
df2.head()
| id | city | date | player_of_match | venue | neutral_venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | method | umpire1 | umpire2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 335982 | Bangalore | 2008-04-18 | BB McCullum | M Chinnaswamy Stadium | 0 | Royal Challengers Bangalore | Kolkata Knight Riders | Royal Challengers Bangalore | field | Kolkata Knight Riders | runs | 140.0 | N | NaN | Asad Rauf | RE Koertzen |
| 1 | 335983 | Chandigarh | 2008-04-19 | MEK Hussey | Punjab Cricket Association Stadium, Mohali | 0 | Kings XI Punjab | Chennai Super Kings | Chennai Super Kings | bat | Chennai Super Kings | runs | 33.0 | N | NaN | MR Benson | SL Shastri |
| 2 | 335984 | Delhi | 2008-04-19 | MF Maharoof | Feroz Shah Kotla | 0 | Delhi Daredevils | Rajasthan Royals | Rajasthan Royals | bat | Delhi Daredevils | wickets | 9.0 | N | NaN | Aleem Dar | GA Pratapkumar |
| 3 | 335985 | Mumbai | 2008-04-20 | MV Boucher | Wankhede Stadium | 0 | Mumbai Indians | Royal Challengers Bangalore | Mumbai Indians | bat | Royal Challengers Bangalore | wickets | 5.0 | N | NaN | SJ Davis | DJ Harper |
| 4 | 335986 | Kolkata | 2008-04-20 | DJ Hussey | Eden Gardens | 0 | Kolkata Knight Riders | Deccan Chargers | Deccan Chargers | bat | Kolkata Knight Riders | wickets | 5.0 | N | NaN | BF Bowden | K Hariharan |
df2.shape # number of rows and columns
(816, 17)
df2.shape[0] # number of rows
816
df2.shape[1] # number of columns
17
# Name of all the columns(labels)
df2.columns
Index(['id', 'city', 'date', 'player_of_match', 'venue', 'neutral_venue',
'team1', 'team2', 'toss_winner', 'toss_decision', 'winner', 'result',
'result_margin', 'eliminator', 'method', 'umpire1', 'umpire2'],
dtype='object')
df2['city'].unique()
array(['Bangalore', 'Chandigarh', 'Delhi', 'Mumbai', 'Kolkata', 'Jaipur',
'Hyderabad', 'Chennai', 'Cape Town', 'Port Elizabeth', 'Durban',
'Centurion', 'East London', 'Johannesburg', 'Kimberley',
'Bloemfontein', 'Ahmedabad', 'Cuttack', 'Nagpur', 'Dharamsala',
'Kochi', 'Indore', 'Visakhapatnam', 'Pune', 'Raipur', 'Ranchi',
'Abu Dhabi', nan, 'Rajkot', 'Kanpur', 'Bengaluru', 'Dubai',
'Sharjah'], dtype=object)
df2['team1'].unique()
array(['Royal Challengers Bangalore', 'Kings XI Punjab',
'Delhi Daredevils', 'Mumbai Indians', 'Kolkata Knight Riders',
'Rajasthan Royals', 'Deccan Chargers', 'Chennai Super Kings',
'Kochi Tuskers Kerala', 'Pune Warriors', 'Sunrisers Hyderabad',
'Gujarat Lions', 'Rising Pune Supergiants',
'Rising Pune Supergiant', 'Delhi Capitals'], dtype=object)
df2['team2'].unique()
array(['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',
'Royal Challengers Bangalore', 'Deccan Chargers',
'Kings XI Punjab', 'Delhi Daredevils', 'Mumbai Indians',
'Kochi Tuskers Kerala', 'Pune Warriors', 'Sunrisers Hyderabad',
'Rising Pune Supergiants', 'Gujarat Lions',
'Rising Pune Supergiant', 'Delhi Capitals'], dtype=object)
df2['venue'].value_counts()
Eden Gardens 77 Feroz Shah Kotla 74 Wankhede Stadium 73 M Chinnaswamy Stadium 65 Rajiv Gandhi International Stadium, Uppal 64 MA Chidambaram Stadium, Chepauk 57 Sawai Mansingh Stadium 47 Punjab Cricket Association Stadium, Mohali 35 Dubai International Cricket Stadium 33 Sheikh Zayed Stadium 29 Punjab Cricket Association IS Bindra Stadium, Mohali 21 Maharashtra Cricket Association Stadium 21 Sharjah Cricket Stadium 18 Dr DY Patil Sports Academy 17 Subrata Roy Sahara Stadium 17 M.Chinnaswamy Stadium 15 Kingsmead 15 Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium 13 Sardar Patel Stadium, Motera 12 SuperSport Park 12 Brabourne Stadium 11 Saurashtra Cricket Association Stadium 10 Himachal Pradesh Cricket Association Stadium 9 Holkar Cricket Stadium 9 New Wanderers Stadium 8 Barabati Stadium 7 Newlands 7 St George's Park 7 JSCA International Stadium Complex 7 Shaheed Veer Narayan Singh International Stadium 6 Nehru Stadium 5 Green Park 4 Buffalo Park 3 Vidarbha Cricket Association Stadium, Jamtha 3 De Beers Diamond Oval 3 OUTsurance Oval 2 Name: venue, dtype: int64
df2_new=df2.drop(labels=['date','neutral_venue','method'],axis=1)
df2_new.head()
| id | city | player_of_match | venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | umpire1 | umpire2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 335982 | Bangalore | BB McCullum | M Chinnaswamy Stadium | Royal Challengers Bangalore | Kolkata Knight Riders | Royal Challengers Bangalore | field | Kolkata Knight Riders | runs | 140.0 | N | Asad Rauf | RE Koertzen |
| 1 | 335983 | Chandigarh | MEK Hussey | Punjab Cricket Association Stadium, Mohali | Kings XI Punjab | Chennai Super Kings | Chennai Super Kings | bat | Chennai Super Kings | runs | 33.0 | N | MR Benson | SL Shastri |
| 2 | 335984 | Delhi | MF Maharoof | Feroz Shah Kotla | Delhi Daredevils | Rajasthan Royals | Rajasthan Royals | bat | Delhi Daredevils | wickets | 9.0 | N | Aleem Dar | GA Pratapkumar |
| 3 | 335985 | Mumbai | MV Boucher | Wankhede Stadium | Mumbai Indians | Royal Challengers Bangalore | Mumbai Indians | bat | Royal Challengers Bangalore | wickets | 5.0 | N | SJ Davis | DJ Harper |
| 4 | 335986 | Kolkata | DJ Hussey | Eden Gardens | Kolkata Knight Riders | Deccan Chargers | Deccan Chargers | bat | Kolkata Knight Riders | wickets | 5.0 | N | BF Bowden | K Hariharan |
df2_new.shape
(816, 14)
sns.heatmap(df2.isnull(),cbar=False,cmap='viridis')
plt.show()
We observe that yellow color show the null values.
df2_new.isnull().sum()
id 0 city 13 player_of_match 4 venue 0 team1 0 team2 0 toss_winner 0 toss_decision 0 winner 4 result 4 result_margin 17 eliminator 4 umpire1 0 umpire2 0 dtype: int64
We observe that rows consisting of 'Sharjah Cricket Stadium' and 'Dubai International Cricket Stadium' only have their "city" values missing. Hence we would be filling the null values with Sharjah and Dubai respectively.
for i in df2_new[df2_new['city'].isna()].index:
df2_new.loc[i, 'city'] = 'Sharjah' if df2_new.loc[i, 'venue'] == 'Sharjah Cricket Stadium' else 'Dubai'
df2_new['city'].unique()
array(['Bangalore', 'Chandigarh', 'Delhi', 'Mumbai', 'Kolkata', 'Jaipur',
'Hyderabad', 'Chennai', 'Cape Town', 'Port Elizabeth', 'Durban',
'Centurion', 'East London', 'Johannesburg', 'Kimberley',
'Bloemfontein', 'Ahmedabad', 'Cuttack', 'Nagpur', 'Dharamsala',
'Kochi', 'Indore', 'Visakhapatnam', 'Pune', 'Raipur', 'Ranchi',
'Abu Dhabi', 'Sharjah', 'Dubai', 'Rajkot', 'Kanpur', 'Bengaluru'],
dtype=object)
df2_new.isnull().sum()
id 0 city 0 player_of_match 4 venue 0 team1 0 team2 0 toss_winner 0 toss_decision 0 winner 4 result 4 result_margin 17 eliminator 4 umpire1 0 umpire2 0 dtype: int64
df2_new[df2_new['player_of_match'].isnull()]
| id | city | player_of_match | venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | umpire1 | umpire2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 241 | 501265 | Delhi | NaN | Feroz Shah Kotla | Delhi Daredevils | Pune Warriors | Delhi Daredevils | bat | NaN | NaN | NaN | NaN | SS Hazare | RJ Tucker |
| 486 | 829763 | Bangalore | NaN | M Chinnaswamy Stadium | Royal Challengers Bangalore | Rajasthan Royals | Rajasthan Royals | field | NaN | NaN | NaN | NaN | JD Cloete | PG Pathak |
| 511 | 829813 | Bangalore | NaN | M Chinnaswamy Stadium | Royal Challengers Bangalore | Delhi Daredevils | Royal Challengers Bangalore | field | NaN | NaN | NaN | NaN | HDPK Dharmasena | K Srinivasan |
| 744 | 1178424 | Bengaluru | NaN | M.Chinnaswamy Stadium | Royal Challengers Bangalore | Rajasthan Royals | Rajasthan Royals | field | NaN | NaN | NaN | NaN | UV Gandhe | NJ Llong |
We Observe that we don't have any values of the "player_of_match", "winner", "result" , "result_margin" ,"eliminator".
All these values remain missing due to rain.
Hence we will fill these NaN values with "no result"
def result(): #passing the dataframe as the parameter
for i in df2_new.loc[df2_new['result'].isna()].index:
df2_new.loc[i,'winner']="No result"
df2_new.loc[i,'result']="No result"
df2_new.loc[i,'result_margin']="No result"
result()
df2_new['player_of_match']= df2_new['player_of_match'].fillna("No result")
df2_new['eliminator']= df2_new['eliminator'].fillna("No result")
df2_new[df2_new['result_margin'].isnull()]
| id | city | player_of_match | venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | umpire1 | umpire2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 66 | 392190 | Cape Town | YK Pathan | Newlands | Kolkata Knight Riders | Rajasthan Royals | Kolkata Knight Riders | field | Rajasthan Royals | tie | NaN | Y | MR Benson | M Erasmus |
| 130 | 419121 | Chennai | J Theron | MA Chidambaram Stadium, Chepauk | Chennai Super Kings | Kings XI Punjab | Chennai Super Kings | field | Kings XI Punjab | tie | NaN | Y | K Hariharan | DJ Harper |
| 328 | 598004 | Hyderabad | GH Vihari | Rajiv Gandhi International Stadium, Uppal | Sunrisers Hyderabad | Royal Challengers Bangalore | Royal Challengers Bangalore | bat | Sunrisers Hyderabad | tie | NaN | Y | AK Chaudhary | S Ravi |
| 341 | 598017 | Bangalore | V Kohli | M Chinnaswamy Stadium | Royal Challengers Bangalore | Delhi Daredevils | Royal Challengers Bangalore | field | Royal Challengers Bangalore | tie | NaN | Y | M Erasmus | VA Kulkarni |
| 416 | 729315 | Abu Dhabi | JP Faulkner | Sheikh Zayed Stadium | Kolkata Knight Riders | Rajasthan Royals | Rajasthan Royals | bat | Rajasthan Royals | tie | NaN | Y | Aleem Dar | AK Chaudhary |
| 476 | 829741 | Ahmedabad | SE Marsh | Sardar Patel Stadium, Motera | Rajasthan Royals | Kings XI Punjab | Kings XI Punjab | field | Kings XI Punjab | tie | NaN | Y | M Erasmus | S Ravi |
| 610 | 1082625 | Rajkot | KH Pandya | Saurashtra Cricket Association Stadium | Gujarat Lions | Mumbai Indians | Gujarat Lions | bat | Mumbai Indians | tie | NaN | Y | AK Chaudhary | CB Gaffaney |
| 705 | 1175365 | Delhi | PP Shaw | Feroz Shah Kotla | Delhi Capitals | Kolkata Knight Riders | Delhi Capitals | field | Delhi Capitals | tie | NaN | Y | AY Dandekar | Nitin Menon |
| 746 | 1178426 | Mumbai | JJ Bumrah | Wankhede Stadium | Mumbai Indians | Sunrisers Hyderabad | Mumbai Indians | bat | Mumbai Indians | tie | NaN | Y | CK Nandan | S Ravi |
| 757 | 1216493 | Dubai | MP Stoinis | Dubai International Cricket Stadium | Delhi Capitals | Kings XI Punjab | Kings XI Punjab | field | Delhi Capitals | tie | NaN | Y | AK Chaudhary | Nitin Menon |
| 776 | 1216512 | Abu Dhabi | LH Ferguson | Sheikh Zayed Stadium | Kolkata Knight Riders | Sunrisers Hyderabad | Sunrisers Hyderabad | field | Kolkata Knight Riders | tie | NaN | Y | PG Pathak | S Ravi |
| 781 | 1216517 | Dubai | KL Rahul | Dubai International Cricket Stadium | Mumbai Indians | Kings XI Punjab | Mumbai Indians | bat | Kings XI Punjab | tie | NaN | Y | Nitin Menon | PR Reiffel |
| 811 | 1216547 | Dubai | AB de Villiers | Dubai International Cricket Stadium | Royal Challengers Bangalore | Mumbai Indians | Mumbai Indians | field | Royal Challengers Bangalore | tie | NaN | Y | Nitin Menon | PR Reiffel |
We observe that we got NaN values in 'result_margin' because result was tied.
So we fill these NaN values with "no result".
df2_new['result_margin']= df2_new['result_margin'].fillna(0)
df2_new.isnull().sum()
id 0 city 0 player_of_match 0 venue 0 team1 0 team2 0 toss_winner 0 toss_decision 0 winner 0 result 0 result_margin 0 eliminator 0 umpire1 0 umpire2 0 dtype: int64
runs=df2_new[df2_new['result']=='runs']
wickets=df2_new[df2_new['result']=='wickets']
sns.countplot(df2_new.toss_decision)
C:\Users\HP\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
<AxesSubplot:xlabel='toss_decision', ylabel='count'>
plt.figure(figsize=(14,8))
sns.countplot(df2_new.result)
C:\Users\HP\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
<AxesSubplot:xlabel='result', ylabel='count'>
df2_new["winner"].value_counts()
Mumbai Indians 120 Chennai Super Kings 106 Kolkata Knight Riders 99 Royal Challengers Bangalore 91 Kings XI Punjab 88 Rajasthan Royals 81 Delhi Daredevils 67 Sunrisers Hyderabad 66 Deccan Chargers 29 Delhi Capitals 19 Gujarat Lions 13 Pune Warriors 12 Rising Pune Supergiant 10 Kochi Tuskers Kerala 6 Rising Pune Supergiants 5 No result 4 Name: winner, dtype: int64
df2_new["toss_winner"].value_counts()
Mumbai Indians 106 Kolkata Knight Riders 98 Chennai Super Kings 97 Royal Challengers Bangalore 87 Rajasthan Royals 87 Kings XI Punjab 85 Delhi Daredevils 80 Sunrisers Hyderabad 57 Deccan Chargers 43 Pune Warriors 20 Delhi Capitals 20 Gujarat Lions 15 Kochi Tuskers Kerala 8 Rising Pune Supergiants 7 Rising Pune Supergiant 6 Name: toss_winner, dtype: int64
plt.figure(figsize=(20,5))
x=df2_new['toss_winner'].value_counts().keys()
y=df2_new['toss_winner'].value_counts()
team=sns.barplot(x,y)
plt.xlabel('Team',fontsize=15)
plt.ylabel('Count',fontsize=15)
plt.title('Number of times a team won the toss',pad=15,fontsize=15)
team.set_xticklabels(rotation=90,labels=x,fontsize=15)
C:\Users\HP\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
[Text(0, 0, 'Mumbai Indians'), Text(1, 0, 'Kolkata Knight Riders'), Text(2, 0, 'Chennai Super Kings'), Text(3, 0, 'Royal Challengers Bangalore'), Text(4, 0, 'Rajasthan Royals'), Text(5, 0, 'Kings XI Punjab'), Text(6, 0, 'Delhi Daredevils'), Text(7, 0, 'Sunrisers Hyderabad'), Text(8, 0, 'Deccan Chargers'), Text(9, 0, 'Pune Warriors'), Text(10, 0, 'Delhi Capitals'), Text(11, 0, 'Gujarat Lions'), Text(12, 0, 'Kochi Tuskers Kerala'), Text(13, 0, 'Rising Pune Supergiants'), Text(14, 0, 'Rising Pune Supergiant')]
Mumbai indians won the most number of tosses followed by the Kolkata knight Riders and chennai Super kings.
plt.figure(figsize=(12,12))
plt.pie(list(df2_new['winner'].value_counts()),labels=list(df2_new['winner'].value_counts().keys()),autopct='%0.1f%%')
plt.show()
plt.figure(figsize=(20,5))
x=df2_new['winner'].value_counts().keys()
y=df2_new['winner'].value_counts()
team=sns.barplot(x,y)
plt.xlabel('Team',fontsize=15)
plt.ylabel('Count',fontsize=15)
plt.title('Number of times a team won the match',pad=15,fontsize=15)
team.set_xticklabels(rotation=90,labels=x,fontsize=15)
C:\Users\HP\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
[Text(0, 0, 'Mumbai Indians'), Text(1, 0, 'Chennai Super Kings'), Text(2, 0, 'Kolkata Knight Riders'), Text(3, 0, 'Royal Challengers Bangalore'), Text(4, 0, 'Kings XI Punjab'), Text(5, 0, 'Rajasthan Royals'), Text(6, 0, 'Delhi Daredevils'), Text(7, 0, 'Sunrisers Hyderabad'), Text(8, 0, 'Deccan Chargers'), Text(9, 0, 'Delhi Capitals'), Text(10, 0, 'Gujarat Lions'), Text(11, 0, 'Pune Warriors'), Text(12, 0, 'Rising Pune Supergiant'), Text(13, 0, 'Kochi Tuskers Kerala'), Text(14, 0, 'Rising Pune Supergiants'), Text(15, 0, 'No result')]
1.Mumbai indians won the most number of matches followed by the chennai Super kings and Kolkata knight Riders.
plt.figure(figsize=(20,5))
x=df2['player_of_match'].value_counts()[0:10]
y=df2['player_of_match'].value_counts().keys()[0:10]
sns.barplot(y,x)
plt.title('Top 10 Players With Most Player of Match',fontsize=15)
plt.xlabel('Player of Match',fontsize=15)
C:\Users\HP\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
Text(0.5, 0, 'Player of Match')
df2_new.boxplot()
<AxesSubplot:>
df2_new.hist()
array([[<AxesSubplot:title={'center':'id'}>]], dtype=object)
sns.pairplot(df2_new)
<seaborn.axisgrid.PairGrid at 0x1af94811760>
sns.heatmap(df2_new.isnull(),cbar=False,cmap='viridis')
<AxesSubplot:>
We Observe that there is no null values.
um = pd.concat([df2_new['umpire1'],df2_new['umpire2']])
um = um.value_counts()
um = um.to_frame().reset_index()
um.head(12)
| index | 0 | |
|---|---|---|
| 0 | S Ravi | 121 |
| 1 | HDPK Dharmasena | 94 |
| 2 | AK Chaudhary | 87 |
| 3 | C Shamshuddin | 82 |
| 4 | M Erasmus | 65 |
| 5 | CK Nandan | 57 |
| 6 | Nitin Menon | 57 |
| 7 | SJA Taufel | 55 |
| 8 | Asad Rauf | 51 |
| 9 | VA Kulkarni | 50 |
| 10 | BNJ Oxenford | 48 |
| 11 | CB Gaffaney | 47 |
fig = px.box(df2_new, y="result_margin", x="city")
fig.show()
fig,ax=plt.subplots(1,2,figsize=(10,5),sharey=True)
sns.histplot(runs['result_margin'],ax=ax[0])
ax[0].set_title('Win by Runs',pad=15)
ax[0].set_xlabel('Margin')
sns.histplot(wickets['result_margin'],ax=ax[1])
ax[1].set_title('Win by Wickets',pad=15)
ax[1].set_xlabel('Margin')
sns.despine(fig=None)
df1
| id | inning | over | ball | batsman | non_striker | bowler | batsman_runs | extra_runs | total_runs | non_boundary | is_wicket | dismissal_kind | player_dismissed | fielder | extras_type | batting_team | bowling_team | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 335982 | 1 | 6 | 5 | RT Ponting | BB McCullum | AA Noffke | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 1 | 335982 | 1 | 6 | 6 | BB McCullum | RT Ponting | AA Noffke | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 2 | 335982 | 1 | 7 | 1 | BB McCullum | RT Ponting | Z Khan | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 3 | 335982 | 1 | 7 | 2 | BB McCullum | RT Ponting | Z Khan | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 4 | 335982 | 1 | 7 | 3 | RT Ponting | BB McCullum | Z Khan | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 193463 | 1237181 | 1 | 12 | 5 | RR Pant | SS Iyer | NM Coulter-Nile | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | Delhi Capitals | Mumbai Indians |
| 193464 | 1237181 | 1 | 12 | 6 | RR Pant | SS Iyer | NM Coulter-Nile | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Delhi Capitals | Mumbai Indians |
| 193465 | 1237181 | 1 | 13 | 1 | RR Pant | SS Iyer | KH Pandya | 0 | 1 | 1 | 0 | 0 | NaN | NaN | NaN | wides | Delhi Capitals | Mumbai Indians |
| 193466 | 1237181 | 1 | 13 | 2 | RR Pant | SS Iyer | KH Pandya | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Delhi Capitals | Mumbai Indians |
| 193467 | 1237181 | 1 | 13 | 3 | SS Iyer | RR Pant | KH Pandya | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Delhi Capitals | Mumbai Indians |
193468 rows × 18 columns
df1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 193468 entries, 0 to 193467 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 193468 non-null int64 1 inning 193468 non-null int64 2 over 193468 non-null int64 3 ball 193468 non-null int64 4 batsman 193468 non-null object 5 non_striker 193468 non-null object 6 bowler 193468 non-null object 7 batsman_runs 193468 non-null int64 8 extra_runs 193468 non-null int64 9 total_runs 193468 non-null int64 10 non_boundary 193468 non-null int64 11 is_wicket 193468 non-null int64 12 dismissal_kind 9495 non-null object 13 player_dismissed 9495 non-null object 14 fielder 6784 non-null object 15 extras_type 10233 non-null object 16 batting_team 193468 non-null object 17 bowling_team 193277 non-null object dtypes: int64(9), object(9) memory usage: 26.6+ MB
df1.shape
(193468, 18)
df1.columns
Index(['id', 'inning', 'over', 'ball', 'batsman', 'non_striker', 'bowler',
'batsman_runs', 'extra_runs', 'total_runs', 'non_boundary', 'is_wicket',
'dismissal_kind', 'player_dismissed', 'fielder', 'extras_type',
'batting_team', 'bowling_team'],
dtype='object')
df1.describe()
| id | inning | over | ball | batsman_runs | extra_runs | total_runs | non_boundary | is_wicket | |
|---|---|---|---|---|---|---|---|---|---|
| count | 1.934680e+05 | 193468.000000 | 193468.000000 | 193468.000000 | 193468.000000 | 193468.000000 | 193468.000000 | 193468.000000 | 193468.000000 |
| mean | 7.567688e+05 | 1.482131 | 9.177027 | 3.615967 | 1.240231 | 0.066414 | 1.306645 | 0.000083 | 0.049078 |
| std | 3.060971e+05 | 0.499682 | 5.676848 | 1.807128 | 1.610867 | 0.339991 | 1.598802 | 0.009094 | 0.216031 |
| min | 3.359820e+05 | 1.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 5.012270e+05 | 1.000000 | 4.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 50% | 7.292970e+05 | 1.000000 | 9.000000 | 4.000000 | 1.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 |
| 75% | 1.082628e+06 | 2.000000 | 14.000000 | 5.000000 | 1.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 |
| max | 1.237181e+06 | 2.000000 | 19.000000 | 9.000000 | 6.000000 | 7.000000 | 7.000000 | 1.000000 | 1.000000 |
df1.isnull().sum()
id 0 inning 0 over 0 ball 0 batsman 0 non_striker 0 bowler 0 batsman_runs 0 extra_runs 0 total_runs 0 non_boundary 0 is_wicket 0 dismissal_kind 183973 player_dismissed 183973 fielder 186684 extras_type 183235 batting_team 0 bowling_team 191 dtype: int64
sns.heatmap(df1.isnull(),cbar=False,cmap='viridis')
plt.show()
We observe that yellow color show the null values.
df1_new=df1.drop(labels=['dismissal_kind','player_dismissed','fielder','extras_type'],axis=1)
df1_new.head()
| id | inning | over | ball | batsman | non_striker | bowler | batsman_runs | extra_runs | total_runs | non_boundary | is_wicket | batting_team | bowling_team | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 335982 | 1 | 6 | 5 | RT Ponting | BB McCullum | AA Noffke | 1 | 0 | 1 | 0 | 0 | Kolkata Knight Riders | Royal Challengers Bangalore |
| 1 | 335982 | 1 | 6 | 6 | BB McCullum | RT Ponting | AA Noffke | 1 | 0 | 1 | 0 | 0 | Kolkata Knight Riders | Royal Challengers Bangalore |
| 2 | 335982 | 1 | 7 | 1 | BB McCullum | RT Ponting | Z Khan | 0 | 0 | 0 | 0 | 0 | Kolkata Knight Riders | Royal Challengers Bangalore |
| 3 | 335982 | 1 | 7 | 2 | BB McCullum | RT Ponting | Z Khan | 1 | 0 | 1 | 0 | 0 | Kolkata Knight Riders | Royal Challengers Bangalore |
| 4 | 335982 | 1 | 7 | 3 | RT Ponting | BB McCullum | Z Khan | 1 | 0 | 1 | 0 | 0 | Kolkata Knight Riders | Royal Challengers Bangalore |
df1_new.shape
(193468, 14)
df1_new.isnull().sum()
id 0 inning 0 over 0 ball 0 batsman 0 non_striker 0 bowler 0 batsman_runs 0 extra_runs 0 total_runs 0 non_boundary 0 is_wicket 0 batting_team 0 bowling_team 191 dtype: int64
df1_new[df1_new['bowling_team'].isnull()]
| id | inning | over | ball | batsman | non_striker | bowler | batsman_runs | extra_runs | total_runs | non_boundary | is_wicket | batting_team | bowling_team | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 57048 | 501265 | 1 | 0 | 1 | NV Ojha | DA Warner | AC Thomas | 1 | 0 | 1 | 0 | 0 | Delhi Daredevils | NaN |
| 57049 | 501265 | 1 | 0 | 2 | DA Warner | NV Ojha | AC Thomas | 0 | 1 | 1 | 0 | 0 | Delhi Daredevils | NaN |
| 57050 | 501265 | 1 | 1 | 1 | DA Warner | NV Ojha | JP Faulkner | 0 | 1 | 1 | 0 | 0 | Delhi Daredevils | NaN |
| 57051 | 501265 | 1 | 1 | 2 | NV Ojha | DA Warner | JP Faulkner | 0 | 0 | 0 | 0 | 0 | Delhi Daredevils | NaN |
| 57052 | 501265 | 1 | 1 | 3 | NV Ojha | DA Warner | JP Faulkner | 0 | 0 | 0 | 0 | 0 | Delhi Daredevils | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115654 | 829763 | 1 | 7 | 3 | AB de Villiers | Mandeep Singh | STR Binny | 0 | 0 | 0 | 0 | 0 | Royal Challengers Bangalore | NaN |
| 115655 | 829763 | 1 | 8 | 6 | AB de Villiers | Mandeep Singh | SR Watson | 2 | 0 | 2 | 0 | 0 | Royal Challengers Bangalore | NaN |
| 115656 | 829763 | 1 | 9 | 1 | Mandeep Singh | AB de Villiers | STR Binny | 4 | 0 | 4 | 0 | 0 | Royal Challengers Bangalore | NaN |
| 115657 | 829763 | 1 | 8 | 4 | AB de Villiers | Mandeep Singh | SR Watson | 0 | 0 | 0 | 0 | 0 | Royal Challengers Bangalore | NaN |
| 115658 | 829763 | 1 | 8 | 5 | AB de Villiers | Mandeep Singh | SR Watson | 0 | 0 | 0 | 0 | 0 | Royal Challengers Bangalore | NaN |
191 rows × 14 columns
df1_new['bowling_team'].unique()
array(['Royal Challengers Bangalore', 'Kolkata Knight Riders',
'Chennai Super Kings', 'Kings XI Punjab', 'Delhi Daredevils',
'Rajasthan Royals', 'Mumbai Indians', 'Deccan Chargers',
'Kochi Tuskers Kerala', 'Pune Warriors', nan,
'Sunrisers Hyderabad', 'Rising Pune Supergiants', 'Gujarat Lions',
'Rising Pune Supergiant', 'Delhi Capitals'], dtype=object)
correlation = df1_new.corr()
plt.figure(figsize=(18, 18))
sns.heatmap(correlation, xticklabels=correlation.columns, yticklabels=correlation.columns, annot=True)
<AxesSubplot:>
df1_new['bowling_team'].value_counts()
Mumbai Indians 24453 Royal Challengers Bangalore 23024 Kolkata Knight Riders 22583 Kings XI Punjab 22457 Chennai Super Kings 21224 Rajasthan Royals 18972 Delhi Daredevils 18719 Sunrisers Hyderabad 14703 Deccan Chargers 9039 Pune Warriors 5394 Delhi Capitals 4012 Gujarat Lions 3540 Rising Pune Supergiant 1928 Rising Pune Supergiants 1615 Kochi Tuskers Kerala 1614 Name: bowling_team, dtype: int64
df1_new['bowling_team'].fillna(df1_new["bowling_team"].mode()[0],inplace=True)
df1_new.isnull().sum()
id 0 inning 0 over 0 ball 0 batsman 0 non_striker 0 bowler 0 batsman_runs 0 extra_runs 0 total_runs 0 non_boundary 0 is_wicket 0 batting_team 0 bowling_team 0 dtype: int64
We Observe that there is no null values.
df1_new.describe()
| id | inning | over | ball | batsman_runs | extra_runs | total_runs | non_boundary | is_wicket | |
|---|---|---|---|---|---|---|---|---|---|
| count | 1.934680e+05 | 193468.000000 | 193468.000000 | 193468.000000 | 193468.000000 | 193468.000000 | 193468.000000 | 193468.000000 | 193468.000000 |
| mean | 7.567688e+05 | 1.482131 | 9.177027 | 3.615967 | 1.240231 | 0.066414 | 1.306645 | 0.000083 | 0.049078 |
| std | 3.060971e+05 | 0.499682 | 5.676848 | 1.807128 | 1.610867 | 0.339991 | 1.598802 | 0.009094 | 0.216031 |
| min | 3.359820e+05 | 1.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 5.012270e+05 | 1.000000 | 4.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 50% | 7.292970e+05 | 1.000000 | 9.000000 | 4.000000 | 1.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 |
| 75% | 1.082628e+06 | 2.000000 | 14.000000 | 5.000000 | 1.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 |
| max | 1.237181e+06 | 2.000000 | 19.000000 | 9.000000 | 6.000000 | 7.000000 | 7.000000 | 1.000000 | 1.000000 |
sns.pairplot(df1_new,height=1.5)
<seaborn.axisgrid.PairGrid at 0x1afa23d35b0>
sns.pairplot(df1_new , height=1.5)
<seaborn.axisgrid.PairGrid at 0x1afa26c5e50>
plt.figure(figsize=(20,5))
x=df1_new['batsman'].value_counts()[0:20]
y=df1_new['batsman'].value_counts().keys()[0:20]
sns.barplot(y,x)
plt.title('Top 20 batsman With Most Player of Match',fontsize=15)
plt.xlabel('Player of Match',fontsize=15)
C:\Users\HP\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 0, 'Player of Match')
plt.figure(figsize=(20,5))
x=df1_new['bowling_team'].value_counts().keys()[0:20]
y=df1_new['bowling_team'].value_counts()[0:20]
team=sns.barplot(x,y)
plt.xlabel('Team',fontsize=15)
plt.ylabel('Count',fontsize=15)
team.set_xticklabels(rotation=90,labels=x,fontsize=15)
C:\Users\HP\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
[Text(0, 0, 'Mumbai Indians'), Text(1, 0, 'Royal Challengers Bangalore'), Text(2, 0, 'Kolkata Knight Riders'), Text(3, 0, 'Kings XI Punjab'), Text(4, 0, 'Chennai Super Kings'), Text(5, 0, 'Rajasthan Royals'), Text(6, 0, 'Delhi Daredevils'), Text(7, 0, 'Sunrisers Hyderabad'), Text(8, 0, 'Deccan Chargers'), Text(9, 0, 'Pune Warriors'), Text(10, 0, 'Delhi Capitals'), Text(11, 0, 'Gujarat Lions'), Text(12, 0, 'Rising Pune Supergiant'), Text(13, 0, 'Rising Pune Supergiants'), Text(14, 0, 'Kochi Tuskers Kerala')]